The required libraries are loaded - RomicsProcessor written by Geremy Clair (2020) is used to perform trackable transformation and statistics to the dataset - proteinminion written by Geremy Clair (2020) is used to extract fasta information and to perform gene ontology and KEGG pathways enrichement analysis (2020)
library("RomicsProcessor")
library("proteinminion")
library("DT") #for the rendering of the enrichment tables
Using the package ‘Protein Mini-on’ (Geremy Clair 2020, in prep.), The fasta file was downloaded from Unipro for the human and bovine proteome on the Jun 15th, 2020
if(!file.exists("./03_output_files/Uniprot_Homo_sapiens_proteome_UP000005640_2021_03_23.fasta")){
download_UniProtFasta(proteomeID = "UP000005640",reviewed = F,export = TRUE, file="./03_output_files/Uniprot_Homo_sapiens_proteome_UP000005640_2021_03_23.fasta")
UniProtFasta_info<-UniprotFastaParser(file = "./03_output_files/Uniprot_Homo_sapiens_proteome_UP000005640_2021_03_23.fasta")
write.csv(UniProtFasta_info, "./03_output_files/UniProtFasta_info.csv")
}
For each entry, ‘Protein Mini-On’ was use to download Gene Ontology (GO) terms and KEGG ids associated with the proteins. This upload was performed the exact same day as the download of the fasta file was done to ensure that the IDs will be identical as the ones present in the fasta file used).
if(file.exists("./03_output_files/UniprotTable_Homo_sapiens_proteome_UP000005640_2020_07_29.csv")){
UniProtTable<-read.csv("./03_output_files/UniprotTable_Homo_sapiens_proteome_UP000005640_2020_07_29.csv")
}else{
download_UniProtTable(proteomeID = "UP000005640",reviewed = F)
write.csv(UniProtTable,("./03_output_files/UniprotTable_Homo_sapiens_proteome_UP000005640_2020_07_29.csv"),row.names=FALSE)
}
‘Protein-Mini-on’ was then used to generate tables containing the list of GOs, KEGG and reactome pathways and their associated protein IDs
if(file.exists("./03_output_files/UniProtTable_GO.csv")){
UniProtTable_GO<-read.csv(file="./03_output_files/UniProtTable_GO.csv")
UniProtTable_KEGG<-read.csv(file="./03_output_files/UniProtTable_KEGG.csv")
UniProtTable_REACTOME<-read.csv(file="./03_output_files/UniProtTable_REACTOME.csv")
}else{
proteinminion::generate_UniProtTables()
write.csv(UniProtTable_GO,file="./03_output_files/UniProtTable_GO.csv",row.names=FALSE)
write.csv(UniProtTable_KEGG,file="./03_output_files/UniProtTable_KEGG.csv",row.names=FALSE)
write.csv(UniProtTable_REACTOME,file="./03_output_files/UniProtTable_REACTOME.csv",row.names=FALSE)
}
The LFQ data contained in the protein table was loaded, the corresponding metadata was loaded
data<-extractMaxQuant("./01_source_files/proteinGroups.txt",quantification_type = "LFQ",cont.rm = T,site.rm = T,rev.rm = T)
## [1] "115 Proteins were removed (protein(s) only identified by site,contaminant(s),reverse hit(s))"
## [1] "LFQ quantification was used"
IDsdetails<-extractMaxQuantIDs("./01_source_files/proteinGroups.txt",cont.rm = T,site.rm = T,rev.rm = T)
## [1] "115 Proteins were removed (protein(s) only identified by site,contaminant(s),reverse hit(s))"
IDsdetails<-cbind(UniProt_Name=sub(".*\\|","",IDsdetails$protein.ids), IDsdetails)
IDsdetails$representative_protein<-gsub(";.*","",IDsdetails$UniProt_Name)
IDsdetails<-merge(IDsdetails,UniProtTable,by.x="representative_protein",by.y="Uniprot_Accession")
IDsdetails$Gene<-IDsdetails$Gene_Name
IDsdetails$Gene[IDsdetails$Gene==""]<-paste0("undefined_gene_name_",IDsdetails$representative_protein[IDsdetails$Gene==""])
IDsdetails$representative_gene<-gsub(" .*","",IDsdetails$Gene)
IDsdetails<-cbind(IDsdetails[,2:ncol(IDsdetails)],representative_protein=IDsdetails[,1])
metadata<- read.csv(file = "./01_source_files/metadata.csv")
colnames(metadata)<- sub("p","",colnames(data))
colnames(metadata)<-tolower(colnames(metadata))
colnames(metadata)<-sub("x", "",colnames(metadata))
write.csv(IDsdetails,"MaxQuantIDS.csv")
First let’s evaluate the normality of the surface_density and of the percent_tissue
surface_density<-as.numeric(metadata[7,-1])
hist(surface_density,breaks = 8)
t<-shapiro.test(surface_density)
print(paste0("The surface density normality Shapiro-Wilk p=",t$p.value))
## [1] "The surface density normality Shapiro-Wilk p=0.149765528866248"
percent_tissue<-as.numeric(metadata[8,-1])
hist(percent_tissue,breaks = 8)
t<-shapiro.test(percent_tissue)
print(paste0("The surface density normality Shapiro-Wilk p=",t$p.value))
## [1] "The surface density normality Shapiro-Wilk p=0.0164815885642291"
The surface density was non normal we log-transformed the value and tested for normality again
log_surface_density<-log(surface_density)
hist(log_surface_density,breaks = 8)
t<-shapiro.test(log_surface_density)
print(paste0("The surface density normality Shapiro-Wilk p=",t$p.value))
## [1] "The surface density normality Shapiro-Wilk p=0.00164315430349359"
the log_surface_density was normal and was therefore added to the data
metadata<-rbind(metadata,c("log_surface_density",log_surface_density))
The data and metadata were placed in an romics_object, the sample names were retrieved from the metadata, the condition will be use for the coloring of the Figure and statistics
romics_proteins<- romicsCreateObject(data, metadata,main_factor = "disease_status",IDs = IDsdetails)
The missingness was evaluated for each channel/sample
romics_proteins<- romicsZeroToMissing(romics_proteins)
romicsPlotMissing(romics_proteins)
The proteins to be conserved for quantification were selected to contain at least 70% of complete values (3/4 samples) for a given condition, the overall missingness was evaluated after filtering.
romics_proteins<-romicsFilterMissing(romics_proteins, percentage_completeness = 70)
## [1] "1983 rows were removed for the data"
## [1] "Based on the minimum completeness set at 70%"
## [1] "at least the following number of sample(s) containing data was required:"
## Ctrl IPF
## 7 21
## [1] "1922/3905 features remained after filtering (49.22%)."
print(paste0(nrow(romics_proteins$data),"/", nrow(romics_proteins$original_data)," proteins remained after filtering", " (",round(nrow(romics_proteins$data)/nrow(romics_proteins$original_data)*100,2),"%)."))
## [1] "1922/3905 proteins remained after filtering (49.22%)."
romicsPlotMissing(romics_proteins)
As the same quantity of protein was labelled for each sample, the expectation is that the distribution of the protein abundance is centered, therefore a median centering was performed prior to plot again the distribution boxplots.
romics_proteins<-log2transform(romics_proteins)
romics_proteins<-medianCenterSample(romics_proteins)
distribBoxplot(romics_proteins)
The grouping of the samples by is checked by hierarchical clustering
romicsHclust(romics_proteins)
romicsHclust(romicsChangeFactor(romics_proteins,"disease_lvl"))
For some of the subsequent statistics imputations are required, we performed an imputation by assuming that the “non-detected” proteins were either low abundance or missing using the method developped by Tyranova et al. (PMID: 27348712). The gray distribution is the data distribution, the yellow distribution is the one for the random values used for imputation.
imputeMissingEval(romics_proteins,nb_stdev = 2,width_stdev = 0.5, bin=1)
romics_proteins<-imputeMissing(romics_proteins,nb_stdev = 2,width_stdev = 0.5)
A subset containing only the IPF samples was created
romics_IPF<-romicsSubset(romics_proteins,subset_vector = "IPF",type = "keep",by = "level",factor = "disease_status")
romics_IPF<-romicsChangeFactor(romics_IPF,"disease_lvl")
The hclust and PCA grouping were checked again after imputation
PCA_proteins<-romicsPCA(romics_proteins)
indPCAplot(romics_proteins, plotType = "percentage")
indPCAplot(romics_proteins, plotType = "individual",Xcomp=1,Ycomp =2)
indPCAplot(romics_proteins, plotType = "individual",Xcomp=1,Ycomp =3)
indPCA3D(romics_proteins)
indPCA3D(romicsChangeFactor(romics_proteins,"disease_lvl"))
indPCAplot(romics_IPF, plotType = "individual",Xcomp=1,Ycomp =2)
indPCAplot(romics_IPF, plotType = "individual",Xcomp=1,Ycomp =3)
indPCA3D(romics_IPF)
Let’s first evaluate the number of clusters that should be used for the samples
fviz_nbclust(t(romics_proteins$data), kmeans, method = "silhouette", k.max = 10) + theme_bw() + ggtitle("The Silhouette Plot")
fviz_nbclust(t(romics_proteins$data), kmeans, method = "wss")+
geom_vline(xintercept = 3, linetype = 2)
library(cluster)
gap_stat <- clusGap(t(romics_proteins$data), FUN = kmeans, nstart = 25,
K.max = 10, B = 10)
print(gap_stat, method = "firstmax")
## Clustering Gap statistic ["clusGap"] from call:
## clusGap(x = t(romics_proteins$data), FUNcluster = kmeans, K.max = 10, B = 10, nstart = 25)
## B=10 simulated reference sets, k = 1..10; spaceH0="scaledPCA"
## --> Number of clusters (method 'firstmax'): 3
## logW E.logW gap SE.sim
## [1,] 6.741987 6.975959 0.2339717 0.01618882
## [2,] 6.570249 6.868163 0.2979146 0.01210903
## [3,] 6.472499 6.815738 0.3432384 0.01254660
## [4,] 6.427634 6.768607 0.3409728 0.01488539
## [5,] 6.387180 6.724190 0.3370101 0.01531711
## [6,] 6.347208 6.681135 0.3339269 0.01541887
## [7,] 6.307536 6.638092 0.3305558 0.01609475
## [8,] 6.268323 6.596322 0.3279990 0.01603414
## [9,] 6.228380 6.554911 0.3265311 0.01719211
## [10,] 6.186224 6.511944 0.3257197 0.01709298
fviz_gap_stat(gap_stat)
A silhouette analysis indicates that two main clusters exist in the dataset
set.seed(seed = 1)
k<-kmeans(t(romics_proteins$data),2,iter.max = 50)
print("the clusters were the following")
## [1] "the clusters were the following"
print(k$cluster)
## X120_101 X120_107 X120_21 X127_10 X127_13 X127_26 X190_28 X190_56
## 1 1 1 2 1 1 1 1
## X190_59 X204_26 X204_48 X204_75 X224_41 X224_48 X224_84 X253_117
## 1 1 1 1 1 1 1 2
## X253_72 X253_86 X268_3 X268_70 X268_76 X319_42 X319_49 X319_6
## 2 1 1 1 1 1 1 1
## X323_37 X323_42 X323_71 X325_36 X325_61 X325_70 X145_94 X221_35
## 1 1 1 1 1 1 2 2
## X222_66 X248_95 X249_117 X274_87 X302_16 X304_71 X62_27 X94_124
## 2 2 2 2 2 2 2 2
print("Those were used to color the PCA plot")
## [1] "Those were used to color the PCA plot"
PC_coord<-data.frame(PCA_proteins$ind$coord[,1:2])
PC_coord<-cbind(PC_coord,k=as.character(k$cluster))
ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=k)) +
geom_point(size = 4)+
ggtitle("Principal Component Analysis (2 clusters)")+
scale_color_manual(values=c("dodgerblue", "orangered"))+
xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
theme_bw()
Now we will evaluate if there are difference in the clusters log(surface density)
#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])
Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)
#obtain the surface density of the clusters 1 and 2
Surf1 <- as.numeric(metadata[8,colnames(metadata) %in% Clust1])
Surf2 <- as.numeric(metadata[8,colnames(metadata) %in% Clust2])
t<-wilcox.test(Surf1,Surf2)
print(paste0("The t.test pvalue between Clust 1 and 2 is ",t$p.value))
## [1] "The t.test pvalue between Clust 1 and 2 is 0.00368418157314144"
Surf1 <- data.frame(values=Surf1,clust="1")
Surf2 <- data.frame(values=Surf2,clust="2")
Surf <- rbind(Surf1,Surf2)
ggplot(Surf, aes(x=clust, y=values,color=clust,fill=clust))+
geom_boxplot(width=0.4)+
geom_jitter(shape=16,size=4)+
scale_color_manual(values=c("dodgerblue", "orangered"))+
scale_fill_manual(values=alpha(c("dodgerblue", "orangered"), .3))+
theme_bw()
similarily the evaluation was made for the percent_tissue
#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])
Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)
#obtain the surface density of the clusters 1 and 2
Perc1 <- as.numeric(metadata[7,colnames(metadata) %in% Clust1])
Perc2 <- as.numeric(metadata[7,colnames(metadata) %in% Clust2])
t<-wilcox.test(Perc1,Perc2)
print(paste0("The wilcox.test pvalue between Clust 1 and 2 is ",t$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 2 is 7.18178337273514e-07"
Perc1 <- data.frame(values=Perc1,clust="1")
Perc2 <- data.frame(values=Perc2,clust="2")
Perc<- rbind(Perc1,Perc2)
ggplot(Perc, aes(x=clust, y=values,color=clust,fill=clust))+
geom_boxplot(width=0.4)+
geom_jitter(shape=16,size=4)+
scale_color_manual(values=c("dodgerblue", "orangered"))+
scale_fill_manual(values=alpha(c("dodgerblue", "orangered"), .3))+
theme_bw()
the elbow method and gap statistics indicate 3 clusters
k<-kmeans(t(romics_proteins$data),3)
print("the clusters were the following")
## [1] "the clusters were the following"
print(k$cluster)
## X120_101 X120_107 X120_21 X127_10 X127_13 X127_26 X190_28 X190_56
## 1 2 1 1 2 2 1 1
## X190_59 X204_26 X204_48 X204_75 X224_41 X224_48 X224_84 X253_117
## 2 2 2 1 2 1 1 1
## X253_72 X253_86 X268_3 X268_70 X268_76 X319_42 X319_49 X319_6
## 1 1 1 1 1 2 2 2
## X323_37 X323_42 X323_71 X325_36 X325_61 X325_70 X145_94 X221_35
## 2 1 2 2 2 2 3 3
## X222_66 X248_95 X249_117 X274_87 X302_16 X304_71 X62_27 X94_124
## 3 3 3 3 3 3 3 3
print("Those were used to color the PCA plot")
## [1] "Those were used to color the PCA plot"
PC_coord<-data.frame(PCA_proteins$ind$coord[,1:2])
PC_coord<-cbind(PC_coord,k=as.character(k$cluster))
ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=k)) +
geom_point(size = 4,colours=c("yellow","blue","orange"))+
ggtitle("Principal Component Analysis (3 clusters)")+
scale_color_manual(values=c("dodgerblue", "orangered", "gray50"))+
xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
theme_bw()
Now we will evaluate if there are difference in the clusters log surface density
#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])
Clust3 <-names(k$cluster[k$cluster==3])
Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)
Clust3 <-gsub("X","",Clust3)
#obtain the surface density of the clusters 1 and 2
Surf1 <- as.numeric(metadata[8,colnames(metadata) %in% Clust1])
Surf2 <- as.numeric(metadata[8,colnames(metadata) %in% Clust2])
Surf3 <- as.numeric(metadata[8,colnames(metadata) %in% Clust3])
t1_2<-wilcox.test(Surf1,Surf2)
print(paste0("The wilcox.test pvalue between Clust 1 and 2 is ",t1_2$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 2 is 0.967417297543179"
t2_3<-wilcox.test(Surf2,Surf3)
print(paste0("The wilcox.test pvalue between Clust 2 and 3 is ",t2_3$p.value))
## [1] "The wilcox.test pvalue between Clust 2 and 3 is 5.93497228306759e-05"
t1_3<-wilcox.test(Surf1,Surf3)
print(paste0("The wilcox.test pvalue between Clust 1 and 3 is ",t1_3$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 3 is 0.000531700094225333"
Surf1 <- data.frame(values=Surf1,clust="1")
Surf2 <- data.frame(values=Surf2,clust="2")
Surf3 <- data.frame(values=Surf3,clust="3")
Surf <- rbind(Surf1,Surf2,Surf3)
ggplot(Surf, aes(x=clust, y=values,color=clust,fill=clust))+
geom_boxplot(width=0.4)+
geom_jitter(shape=16,size=4)+
scale_color_manual(values=c("dodgerblue", "orangered","gray50"))+
scale_fill_manual(values=alpha(c("dodgerblue", "orangered","gray50"), .3))+
theme_bw()
similarily the evaluation was made for the percent_tissue
#get the sample names for the samples in each cluster
Clust1 <-names(k$cluster[k$cluster==1])
Clust2 <-names(k$cluster[k$cluster==2])
Clust3 <-names(k$cluster[k$cluster==3])
Clust1 <-gsub("X","",Clust1)
Clust2 <-gsub("X","",Clust2)
Clust3 <-gsub("X","",Clust3)
#obtain the surface density of the clusters 1 and 2
Perc1 <- as.numeric(metadata[7,colnames(metadata) %in% Clust1])
Perc2 <- as.numeric(metadata[7,colnames(metadata) %in% Clust2])
Perc3 <- as.numeric(metadata[7,colnames(metadata) %in% Clust3])
t1_2<-wilcox.test(Perc1,Perc2)
print(paste0("The wilcox.test pvalue between Clust 1 and 2 is ",t1_2$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 2 is 0.148479991170565"
t2_3<-wilcox.test(Perc2,Perc3)
print(paste0("The wilcox.test pvalue between Clust 2 and 3 is ",t2_3$p.value))
## [1] "The wilcox.test pvalue between Clust 2 and 3 is 1.22370562537476e-06"
t1_3<-wilcox.test(Perc1,Perc3)
print(paste0("The wilcox.test pvalue between Clust 1 and 3 is ",t1_3$p.value))
## [1] "The wilcox.test pvalue between Clust 1 and 3 is 2.44741125074952e-06"
Perc1 <- data.frame(values=Perc1,clust="1")
Perc2 <- data.frame(values=Perc2,clust="2")
Perc3 <- data.frame(values=Perc3,clust="3")
Perc <- rbind(Perc1,Perc2,Perc3)
ggplot(Perc, aes(x=clust, y=values,color=clust,fill=clust))+
geom_boxplot(width=0.4)+
geom_jitter(shape=16,size=4)+
scale_color_manual(values=c("dodgerblue", "orangered","gray50"))+
scale_fill_manual(values=alpha(c("dodgerblue", "orangered","gray50"), .3))+
theme_bw()
metadata<-data.frame(t(romics_proteins$metadata))
PC_coord<-data.frame(PCA_proteins$ind$coord[,1:2])
PC_coord<-cbind(PC_coord,log_surface_density=as.numeric(metadata$log_surface_density),percent_tissue=metadata$percent_tissue, disease_status=metadata$disease_status, disease_lvl=metadata$disease_lvl)
for (i in 1:4){PC_coord[,i]<-as.numeric(PC_coord[,i])}
library("viridis")
ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=log_surface_density)) +
geom_point(size = 4)+scale_color_gradient(low="blue", high="red")+
geom_text(label=round(PC_coord$log_surface_density,3),position = position_nudge(x = 7))+
ggtitle("Principal Component Analysis (Surface density)") +
xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
theme_bw()+ scale_colour_viridis()
ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=percent_tissue)) +
geom_point(size = 4)+scale_color_gradient(low="blue", high="red")+
geom_text(label=round(PC_coord$percent_tissue,3),position = position_nudge(x = 7))+
ggtitle("Principal Component Analysis (Percent_tissue)") +
xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
theme_bw()+ scale_colour_viridis()
ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=disease_status))+
geom_point(size = 4)+
ggtitle("Principal Component Analysis") +
xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
theme_bw()
ggplot(PC_coord, aes(x=Dim.1,y=Dim.2,color=disease_lvl))+
geom_point(size = 4)+
ggtitle("Principal Component Analysis") +
xlab(paste0("PC1(",round(as.numeric(PCA_proteins$eig[1,2]),2),"%)"))+
ylab(paste0("PC2(",round(as.numeric(PCA_proteins$eig[2,2]),2),"%)"))+
theme_bw()
For all cores we performed a trajectory analysis using the package
SLICER
library("SLICER")
library("lle")
table_data<-t(romics_proteins$data)
proteins <- select_genes(table_data)
k = select_k(table_data[,proteins], kmin=2)
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
## finding neighbours
## calculating weights
## computing coordinates
traj_lle = lle(table_data[,proteins], m=2, k)$Y
## finding neighbours
## calculating weights
## computing coordinates
print("the points of the trajectory are plotted")
## [1] "the points of the trajectory are plotted"
traj_graph = conn_knn_graph(traj_lle,5)
ends = find_extreme_cells(traj_graph, traj_lle)
start = 1
print("The trajectory connections were plotted")
## [1] "The trajectory connections were plotted"
distances = graph_process_distance(traj_graph,embedding = traj_lle,start = 1)
traj_lle_for_graph = as.data.frame(traj_lle)
colnames(traj_lle_for_graph) = c("Manifold_dim_1","Manifold_dim_2")
traj_lle_for_graph$Manifold_dim_1 = as.numeric(traj_lle_for_graph$Manifold_dim_1)
traj_lle_for_graph$Manifold_dim_2 = as.numeric(traj_lle_for_graph$Manifold_dim_2)
traj_lle_for_graph<- cbind(ROI= rownames(table_data), disease_lvl= t(romics_proteins$metadata[3,]), traj_lle_for_graph, branches = assign_branches(traj_graph,start = 1))
print("The plots is colored by the log(surface density")
## [1] "The plots is colored by the log(surface density"
ggplot(traj_lle_for_graph,aes(x=Manifold_dim_1,Manifold_dim_2,color=disease_lvl))+ geom_point(size=1,)+ geom_text(label=traj_lle_for_graph$disease_lvl)+theme_ROP()
DT::datatable(traj_lle_for_graph)
romics_proteins<-romicsMean(romics_proteins)
## [1] "The Statistics layer was added to your object"
## [1] "Means columns (*_mean) were added to the statistics"
romics_proteins<-romicsSd(romics_proteins)
## [1] "The standard deviation columns (*_sd) were added to the statistics"
First we will evaluate the proteins differentially expressed overall between the IPF and control donors
romics_proteins<-romicsANOVA(romics_proteins)
## [1] "The ANOVA columns (ANOVA_p and ANOVA_padj) were added to the statistics"
print(paste0(sum(romics_proteins$statistics$ANOVA_p<0.05), " proteins had an ANOVA p<0.05."))
## [1] "1304 proteins had an ANOVA p<0.05."
pval<-data.frame(ids=rownames(romics_proteins$statistics), p=romics_proteins$statistics$ANOVA_p)
ggplot(pval, aes(p)) + geom_histogram(binwidth = 0.01)+theme_ROP()+ggtitle("ANOVA p frequency plot")+geom_vline(xintercept=0.05,linetype="dashed", color = "red")
print(paste0(sum(romics_proteins$statistics$ANOVA_padj<0.05), " proteins had an ANOVA padjusted<0.05."))
## [1] "1227 proteins had an ANOVA padjusted<0.05."
pval<-data.frame(ids=rownames(romics_proteins$statistics), p=romics_proteins$statistics$ANOVA_padj)
ggplot(pval, aes(p)) + geom_histogram(binwidth = 0.01)+theme_ROP()+ggtitle("ANOVA padj frequency plot")+geom_vline(xintercept=0.05,linetype="dashed", color = "red")
A heatmap depicting the proteins passing an ANOVA p<0.05 is plotted, the clusters obtained were saved in the statistics.
romicsHeatmap(romics_proteins,variable_hclust_number = 2,ANOVA_filter = "p", p=0.05)
romics_proteins<-romicsVariableHclust(romics_proteins,clusters = 2,ANOVA_filter = "p",p= 0.05,plot = F)
## [1] "The columns hclust_clusters was added to the statistics"
romics_proteins<-romicsZscores(romics_proteins)
## [1] "Z_score_ columns were added to the statistics"
Ttests were also calculated
romics_proteins<-romicsTtest(romics_proteins,var.equal = F)
## [1] "T_test columns were added to the statistics"
romicsVolcano(romics_proteins,p_type = "p",min_fold_change = 0)
## [1] "'stat_type' was missing 't.test' were used by default"
Ttests were also calculated by level of IPF
romics_proteins<-romicsChangeFactor(romics_proteins,main_factor = "disease_lvl")
romics_proteins<-romicsTtest(romics_proteins,var.equal = F)
## [1] "T_test columns were added to the statistics"
romicsVolcano(romicsChangeIDs(romics_proteins,newIDs = "representative_gene"),p_type = "p",
min_fold_change = 0,
plot_type = "plotly",
)
## [1] "'stat_type' was missing 't.test' were used by default"
romics_proteins<-romicsChangeFactor(romics_proteins,main_factor = "disease_status")
As there was a strong relation The list of consensus HDL proteins can be found there:https://homepages.uc.edu/~davidswm/HDLproteome.html We uploaded the HDL database and evaluated if our clusters were enriched in those proteins these are added to the HDL associated GO term.
HDL<-read.csv(file = "01_source_files/HDLProteomeList2020.csv")
GO_HDL<-data.frame( Uniprot_Accession=HDL$Acc...,Uniprot_ID=HDL$Abbrev.,GO_accession="34364",GO_description="high-density lipoprotein particle", GO_type="GO_CC")
UniProtTable_GO<-rbind(UniProtTable_GO,GO_HDL)
An enrichment analysis was performed for these two clusters generated
Clust1<-rownames(romics_proteins$statistics)[romics_proteins$statistics$hclust_clusters==1&!is.na(romics_proteins$statistics$hclust_clusters)]
Clust2<-rownames(romics_proteins$statistics)[romics_proteins$statistics$hclust_clusters==2&!is.na(romics_proteins$statistics$hclust_clusters)]
Universe<-rownames(romics_proteins$statistics)
Clust1<-sub(".*\\;","",Clust1)
Clust2<-sub(".*\\;","",Clust2)
Universe<-sub(".*\\;","",Universe)
Clust1_Enr<-cbind(enriched_in="Clust1",Enrich_EASE(Clust1,Universe))
## [1] "2023-10-16 14:33:14 : GO enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 1150 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:16 : KEGG enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 1150 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:16 : REACTOME enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 1150 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
Clust2_Enr<-cbind(enriched_in="Clust2",Enrich_EASE(Clust2,Universe))
## [1] "2023-10-16 14:33:17 : GO enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 91 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:17 : KEGG enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 91 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
## [1] "2023-10-16 14:33:17 : REACTOME enrichment stated."
## [1] "Your <query> contained 0 UniProt_IDs and 91 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1825 UniProt_Accession (some might be redundant)."
Enrichments <-enrfilter(rbind(Clust1_Enr,Clust2_Enr),pval_type = "pval",foldchange = 1,min_feature = 2 )
write.table(Enrichments,file = "03_output_files/Enrichments_ANOVA_clusters.txt",row.names = F, sep="\t")
DT::datatable(Enrichments)
Plots were generated for specific proteins.
Figs<-romicsChangeIDs(romics_proteins,newIDs = "representative_gene")
library("ggpubr")
#APOA1
singleVariablePlot(Figs,variable = "APOA1")+ geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 9, label =formatC(Figs$statistics[grepl("APOA1",rownames(Figs$statistics)),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#APOA4
singleVariablePlot(Figs,variable = "APOA4")+ geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics[grepl("APOA4",rownames(Figs$statistics)),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#APOL1
singleVariablePlot(Figs,variable ="APOL1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 0, label =formatC(Figs$statistics[grepl("APOL1",rownames(Figs$statistics)),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Complement C3
singleVariablePlot(Figs,variable ="C3")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 8, label =formatC(Figs$statistics["C3"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Serum Amyloid A4
singleVariablePlot(Figs,variable ="SAA2-SAA4")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["SAA2-SAA4"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Fibrinogen alpha chain
singleVariablePlot(Figs,variable ="FGA",title = "fibrinogen alpha chain")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 8, label =formatC(Figs$statistics["FGA"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Haptoglobin
singleVariablePlot(Figs,variable ="HP",title = "haptoglobin")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 9, label =formatC(Figs$statistics["HP"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#kininogen-1
singleVariablePlot(Figs,variable ="KNG1",title = "haptoglobin")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["KNG1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Clusterin
singleVariablePlot(Figs,variable ="CLU",title = "Clusterin")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["CLU"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Caveolin-1
singleVariablePlot(Figs,variable ="CAV1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 5, label =formatC(Figs$statistics["CAV1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Caveolin-2
singleVariablePlot(Figs,variable ="CAV2")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["CAV2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#PICALM
singleVariablePlot(Figs,variable ="PICALM")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["PICALM"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#PACSIN2
singleVariablePlot(Figs,variable ="PACSIN2")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["PACSIN2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#alpha-1-anti-trypsin
singleVariablePlot(Figs,variable ="SERPINA1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["SERPINA1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#HDL receptor related protein 1
singleVariablePlot(Figs,variable ="LRP1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["LRP1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#PRX
singleVariablePlot(Figs,variable ="PRX")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["PRX"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#CA4
singleVariablePlot(Figs,variable ="CA4")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3.5, label =formatC(Figs$statistics["CA4"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#AGER
singleVariablePlot(Figs,variable ="AGER")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["AGER"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#SCEL
singleVariablePlot(Figs,variable ="SCEL")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3, label =formatC(Figs$statistics["SCEL"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#ABCA3
singleVariablePlot(Figs,variable ="ABCA3")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2.5, label =formatC(Figs$statistics["SCEL"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#SFTPC
singleVariablePlot(Figs,variable ="SFTPC")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["SFTPC"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#HBB
singleVariablePlot(Figs,variable ="HBB")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 13, label =formatC(Figs$statistics["HBB"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#HBB
p11<-singleVariablePlot(Figs,variable ="HBB")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 13, label =formatC(Figs$statistics["HBB"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#COL14A1
p12<-singleVariablePlot(Figs,variable ="COL14A1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6.5, label =formatC(Figs$statistics["COL14A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#SFTPC
singleVariablePlot(Figs,variable ="SFTPC")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 1, label =formatC(Figs$statistics["SFTPC"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#MFAP5
singleVariablePlot(Figs,variable ="MFAP5")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3, label =formatC(Figs$statistics["MFAP5"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#uteroglobin
singleVariablePlot(Figs,variable ="SCGB1A1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["SCGB1A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#THY1
singleVariablePlot(Figs,variable ="THY1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["THY1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#SYNPO2
p9<-singleVariablePlot(Figs,variable ="SYNPO2")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["SYNPO2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#DES
p10<-singleVariablePlot(Figs,variable ="DES")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["DES"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#ACTA2/a-SMA
singleVariablePlot(Figs,variable ="ACTA2")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["ACTA2"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#to be finished
#COL1A1
singleVariablePlot(Figs,variable ="COL1A1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 7, label =formatC(Figs$statistics["COL1A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#COL3A1
singleVariablePlot(Figs,variable ="COL3A1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["COL3A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#COL15A1
singleVariablePlot(Figs,variable ="COL15A1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["COL15A1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#TGFB1
singleVariablePlot(Figs,variable ="TGFBI")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["TGFBI"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#TGFB1
singleVariablePlot(Figs,variable ="TGFBI")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 2, label =formatC(Figs$statistics["TGFBI"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#TGFB1I1
singleVariablePlot(Figs,variable ="TGFB1I1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3.5, label =formatC(Figs$statistics["TGFB1I1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#CD14
singleVariablePlot(Figs,variable ="CD14")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 3.5, label =formatC(Figs$statistics["CD14"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Antileukoproteinase
singleVariablePlot(Figs,variable ="SLPI")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 6, label =formatC(Figs$statistics["SLPI"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Neutrophil defensin 3
singleVariablePlot(Figs,variable ="DEFA3")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 8, label =formatC(Figs$statistics["DEFA3"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#Chemoatractants
#Macrophage migration inhibitory factor
singleVariablePlot(Figs,variable ="MIF")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["MIF"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
#High mobility group protein B1
singleVariablePlot(Figs,variable ="HMGB1")+
geom_bracket(xmin = "Ctrl", xmax = "IPF", y.position = 4, label =formatC(Figs$statistics["HMGB1"==rownames(Figs$statistics),colnames(Figs$statistics)=="ANOVA_padj"], format = "e"))
Enrichments were performed for the comparison IPF1/Ctrl
IPF1_up<-rownames(romics_proteins$statistics)[romics_proteins$statistics$IPF1_vs_Ctrl_Ttest_p<0.05&romics_proteins$statistics$`log(IPF1/Ctrl)`>0]
IPF1_down<-rownames(romics_proteins$statistics)[romics_proteins$statistics$IPF1_vs_Ctrl_Ttest_p<0.05&romics_proteins$statistics$`log(IPF1/Ctrl)`<0]
universe<-rownames(romics_proteins$statistics)
IPF1_up_GO<-cbind(Enriched_in="up_in_IPF1", UniProt_GO_EASE(IPF1_up,universe))
## [1] "64 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
IPF1_down_GO<-cbind(Enriched_in="down_in_IPF1",UniProt_GO_EASE(IPF1_down,universe))
## [1] "1053 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 1268 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
IPF1_up_KEGG<-cbind(Enriched_in="up_in_IPF1",UniProt_KEGG_EASE(IPF1_up,universe))
## [1] "64 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
IPF1_down_KEGG<-cbind(Enriched_in="down_in_IPF1",UniProt_KEGG_EASE(IPF1_down,universe))
## [1] "1053 proteins of your <query> list had more than one identifier, only the first one was conserved"
## [1] "1569 proteins of your <universe> list had more than one identifier, only the first one was conserved"
## [1] "Your <query> contained 0 UniProt_IDs and 1268 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
Enrichments_IPF1_ctrl<- rbind(IPF1_up_GO,IPF1_down_GO,IPF1_up_KEGG,IPF1_down_KEGG)
Enrichments_IPF1_ctrl <- Enrichments_IPF1_ctrl[Enrichments_IPF1_ctrl$pval<0.05&Enrichments_IPF1_ctrl$fold_change>1,]
DT::datatable(Enrichments_IPF1_ctrl)
write.table(Enrichments_IPF1_ctrl,"./03_output_files/Enrichment_early_IPF.txt",row.names = F,sep="\t")
Now we will look for proteins with a trend relationship between the surface density and the protein abundances, the controls were removed to avoid biasing the linearity
romics_IPF<-romicsTrend(romics_IPF,factor = "surface_density",log_factor = FALSE,type = "linear",p = 0.05)
## [1] "The Statistics layer was added to your object"
## [1] "The trend analysis columns were added to the statistics"
romicsTrendHeatmap(romics_IPF,log_factor = FALSE,labRow = FALSE,margins=c(10,5))
Now for these two trends we will perform an enrichment analysis to identify function associated with these trends
linear_increasing<-gsub("\\;.*","",rownames(romics_IPF$statistics)[romics_IPF$statistics$best_fitted_trend=="linear_increasing"])
linear_decreasing<-gsub("\\;.*","",rownames(romics_IPF$statistics)[romics_IPF$statistics$best_fitted_trend=="linear_decreasing"])
universe<-gsub("\\;.*","",rownames(romics_IPF$statistics))
linear_increasing_KEGG<-UniProt_KEGG_EASE(linear_increasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_decreasing_KEGG<-UniProt_KEGG_EASE(linear_decreasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 270 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_increasing_GO<-UniProt_GO_EASE(linear_increasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 74 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_decreasing_GO<-UniProt_GO_EASE(linear_decreasing,universe)
## [1] "Your <query> contained 0 UniProt_IDs and 270 UniProt_Accession (some might be redundant)."
## [1] "Your <universe> contained 0 UniProt_IDs and 1894 UniProt_Accession (some might be redundant)."
linear_increasing_KEGG<-cbind(Cluster=rep("linear_increasing_KEGG",nrow(linear_increasing_KEGG)),linear_increasing_KEGG)
linear_decreasing_KEGG<-cbind(Cluster=rep("linear_decreasing_KEGG",nrow(linear_decreasing_KEGG)),linear_decreasing_KEGG)
linear_increasing_GO<-cbind(Cluster=rep("linear_increasing_GO",nrow(linear_increasing_GO)),linear_increasing_GO)
linear_decreasing_GO<-cbind(Cluster=rep("linear_decreasing_GO",nrow(linear_decreasing_GO)),linear_decreasing_GO)
Enrichments_trends <- rbind(linear_increasing_GO,linear_decreasing_GO,linear_increasing_KEGG,linear_decreasing_KEGG)
Enrichments_trends <- Enrichments_trends[Enrichments_trends$pval<0.1&Enrichments_trends$fold_change>1,]
DT::datatable(Enrichments_trends)
write.table(Enrichments_trends,"./03_output_files/Enrichment_trends.txt",row.names = F,sep="\t")
Examples of proteins mapped to trends.
singleVariableTrend(romics_IPF,"Q15109",title = "Rage/Ager(AT1)",log_factor = F)
singleVariableTrend(romics_IPF,"H3BTN5",title = "Pyruvate Kinase",log_factor = F)
singleVariableTrend(romics_IPF,"Q13751",title = "Laminin subunit beta-3",log_factor = F)
singleVariableTrend(romics_IPF,"O14558",title = "Heat shock protein beta-6",log_factor = F)
singleVariableTrend(romics_IPF,"Q05682",title = "Caldesmon",log_factor = F)
The final R object was finally exported
save(romics_IPF,file = "03_output_files/Romics_object.Rda")